Supplementary Materials I

First we provide an interactive map of the sites location by protection level to have a better view of Figure 2 in main manuscript.

Code
tomap <- fish %>% 
  group_by(protection, reef) %>% 
  summarise(latitude = mean(latitude), longitude = mean(longitude)) %>% 
  st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) 
mapview::mapview(tomap, zcol = "protection")

Biomass increased in Cabo Pulmo since the 1998 data baseline. Thus, it provides a good study case to understand how the NRSI works. First, we extract only sites that were monitored in 1998 and 1999 and in the following years from 2009 onward.

Code
cp_sites <- fish %>% 
  filter(region == "Cabo Pulmo") %>% 
  filter(year < 2001) %>% 
  pull(reef) %>% unique()

Then we compare those sites with others from the La Paz region, which are under a Lighly Protected area.

Code
more_sites <- c(cp_sites, "ESPIRITU_SANTO_ISLOTES_NORTE", "ESPIRITU_SANTO_BALLENA", "ESPIRITU_SANTO_PUNTA_LOBOS")

library(Hmisc)

# Calculate the mean biomass and standard error
summary_data <- fish %>% 
  filter(reef %in% more_sites) %>% 
  group_by(year, region, reef, depth2, transect) %>% 
  summarise(biomass = sum(biomass)) %>% 
  group_by(year, region, reef) %>% 
  summarise(
    biomass_mean = mean(biomass),
    biomass_se = sd(biomass) / sqrt(n())  # Standard error
  )

# Plot with error bars
ggplot(summary_data, aes(x=year, y=biomass_mean, col=reef, group=reef)) +
  geom_point() + 
  geom_errorbar(aes(ymin=biomass_mean - biomass_se, ymax=biomass_mean + biomass_se), width=0.2) +
  facet_grid(~region) +
  labs(y = "Mean Biomass", title = "Mean Biomass with Standard Error")

Cabo Pulmo sites, which have shown significant recovery in biomass, exhibit greater variability. This is due to the presence of larger fish schools that create substantial variation within transects.

Why are not all sites in Cabo Pulmo recovering to similarly high levels? The issue lies in the scale at which we assess regional health. The location of surveys, such as more or less exposed areas, becomes crucial. This variability is important to consider. Consequently, biomass distributions are skewed to the left, and recovery is reflected in the right tail of the biomass distribution.

Let’s now see how the index works in Cabo Pulmo and La Paz.

Code
# Calculate the mean biomass and standard error

nrsi <-  fish %>% 
    filter(reef %in% more_sites) %>% 
  mutate(index_levels = case_when(
    str_detect(trophic_level_f, "4-4.5") ~ "UTL", 
    str_detect(trophic_level_f, "2-2.5") ~ "LTL", 
    TRUE ~ "CTL"
  ), 
  protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fish Refuge", "Fully Protected"))) %>% 
  mutate(degree = round(latitude)) %>% 
  group_by(year, reef, region, degree, latitude, longitude, depth2, index_levels, transect) %>% 
  summarise(biomass = sum(biomass, na.rm = TRUE)) %>% 
  group_by(year, reef, region, index_levels, degree, latitude, longitude) %>% 
  summarise(biomass = mean(biomass, na.rm = TRUE)) %>% 
  group_by(year, reef, region, degree, latitude, longitude) %>% 
  mutate(rel_biomass = (biomass / sum(biomass, na.rm = TRUE)) * 100) %>% 
  select(-biomass) %>%
  pivot_wider(names_from = index_levels, values_from = rel_biomass) %>%
  mutate(
    UTL = if_else(is.na(UTL), 0, UTL),  # Replace NA with 0 for UTL calculations
    LTL = if_else(is.na(LTL), 0, LTL),  # Replace NA with 0 for LTL calculations
    CTL = if_else(is.na(CTL), 0, CTL),  # Replace NA with 0 for CTL calculations
    nrsi = case_when(
      LTL > UTL + CTL ~ UTL / (UTL + CTL),  # Condition 1: Use only UTL if LTL is greater than UTL + CTL
      TRUE ~ (UTL + LTL - CTL) / (UTL + LTL + CTL)  # Condition 2: Compute as normal otherwise
    )
  ) 

nrsi %>% 
  group_by(year, region) %>% 
  summarise(nrsi = mean(nrsi)) %>% 
  ggplot(aes(x=year, y=nrsi)) +
  geom_point() +
    facet_grid(~region) 

If we want to compare these two regions, we can use the bootstrapping technique by resampling the averages NRSI values. This way we obtain the following graph:

Code
resample_mean <- function(df, n = 9999) {
  replicate(n, {
    sampled <- df[sample(nrow(df), replace = TRUE), ]
    mean((sampled$nrsi), na.rm = TRUE)
  })
}


resampled_prod <- nrsi %>%
  group_by(region) %>%
  nest() %>%
  mutate(
    ResampledMeans = map(data, ~resample_mean(.x)),
    Median = map_dbl(ResampledMeans, median)
  ) %>%
  select(region, ResampledMeans, Median)




toplot <- resampled_prod %>%
  unnest(ResampledMeans) 


ggplot(toplot, aes(x = ResampledMeans, fill = region)) +
  geom_density(binwidth = 0.1, alpha = 0.7, position = 'identity') +
  geom_vline(data = resampled_prod, aes(xintercept = Median, color = region), linetype = "dashed", size = 1) +
  labs(title = "Distribution of Resampled Means by Protection Group",
       x = "Resampled Means",
       y = "Frequency")

Or by year:

Code
resample_mean <- function(df, n = 9999) {
  replicate(n, {
    sampled <- df[sample(nrow(df), replace = TRUE), ]
    mean((sampled$nrsi), na.rm = TRUE)
  })
}


resampled_prod <- nrsi %>%
  group_by(year, region) %>%
  nest() %>%
  mutate(
    ResampledMeans = map(data, ~resample_mean(.x)),
    Median = map_dbl(ResampledMeans, median)
  ) %>%
  select(region, ResampledMeans, Median)

toplot <- resampled_prod %>%
  unnest(ResampledMeans) 

ggplot(toplot, aes(y = factor(year), x = Median, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1) +
  geom_vline(xintercept = 0, linetype = 2) +
  labs(x = "NRSI", y = "") +
  scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
                       values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
  xlim(-0.8,0.8) +
  facet_grid(~region) +
  theme_bw() +
  theme(legend.position = "", 
        
        axis.text.x = element_text(angle = 90, vjust = .5))  

We can also explore within the Cabo Pulmo area, sites that are inside vs the ones that are outside the protection polygon:

Code
nrsi <-  fish %>% 
  mutate(index_levels = case_when(
    str_detect(trophic_level_f, "4-4.5") ~ "UTL", 
    str_detect(trophic_level_f, "2-2.5") ~ "LTL", 
    TRUE ~ "CTL"
  ), 
  protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fish Refuge", "Fully Protected"))) %>% 
  mutate(degree = round(latitude)) %>% 
  filter(region == "Cabo Pulmo") %>% 
  group_by(year, reef, protection, degree, latitude, longitude, depth2, index_levels, transect) %>% 
  summarise(biomass = sum(biomass, na.rm = TRUE)) %>% 
  group_by(year, reef, protection, index_levels, degree, latitude, longitude) %>% 
  summarise(biomass = mean(biomass, na.rm = TRUE)) %>% 
  group_by(year, reef, protection, degree, latitude, longitude) %>% 
  mutate(rel_biomass = (biomass / sum(biomass, na.rm = TRUE)) * 100) %>% 
  select(-biomass) %>%
  pivot_wider(names_from = index_levels, values_from = rel_biomass) %>%
  mutate(
    UTL = if_else(is.na(UTL), 0, UTL),  # Replace NA with 0 for UTL calculations
    LTL = if_else(is.na(LTL), 0, LTL),  # Replace NA with 0 for LTL calculations
    CTL = if_else(is.na(CTL), 0, CTL),  # Replace NA with 0 for CTL calculations
    nrsi = case_when(
      LTL > UTL + CTL ~ UTL / (UTL + CTL),  # Condition 1: Use only UTL if LTL is greater than UTL + CTL
      TRUE ~ (UTL + LTL - CTL) / (UTL + LTL + CTL)  # Condition 2: Compute as normal otherwise
    )
  ) 

nrsi %>% 
  select(reef, protection) %>% 
  unique()
# A tibble: 247 × 6
# Groups:   year, reef, protection, degree, latitude, longitude [247]
    year degree latitude longitude reef                protection     
   <dbl>  <dbl>    <dbl>     <dbl> <chr>               <fct>          
 1  1999     23     23.5     -109. BAJO_CABO_PULMO     Fully Protected
 2  1999     23     23.4     -109. CANTILES_CABO_PULMO Fully Protected
 3  2000     23     23.4     -109. CANTIL_MEDIO        Fully Protected
 4  2000     23     23.5     -109. MORROS_CABO_PULMO   Fully Protected
 5  2009     23     23.5     -109. BAJO_CABO_PULMO     Fully Protected
 6  2009     23     23.4     -109. BARRA_PRIMERA       Fully Protected
 7  2009     23     23.4     -109. BLEDITO             Not Protected  
 8  2009     23     23.4     -109. CANTILES_CABO_PULMO Fully Protected
 9  2009     23     23.4     -109. CANTIL_MEDIO        Fully Protected
10  2009     23     23.4     -109. CASITAS             Fully Protected
# ℹ 237 more rows
Code
resample_mean <- function(df, n = 9999) {
  replicate(n, {
    sampled <- df[sample(nrow(df), replace = TRUE), ]
    mean((sampled$nrsi), na.rm = TRUE)
  })
}


resampled_prod <- nrsi %>%
  filter(year > 2015) %>% 
  group_by(protection) %>%
  nest() %>%
  mutate(
    ResampledMeans = map(data, ~resample_mean(.x)),
    Median = map_dbl(ResampledMeans, median)
  ) %>%
  select(protection, ResampledMeans, Median)

toplot <- resampled_prod %>%
  unnest(ResampledMeans) 

p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1) +
  geom_vline(xintercept = 0, linetype = 2) +
  labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
  scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
                       values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
  xlim(-0.8,0.8) +
  #facet_grid(~protection) +
  theme_bw() +
  theme(legend.position = "", 
        axis.text.x = element_text(angle = 90, vjust = .5))  

p2 <- fish %>% 
  mutate(protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fish Refuge", "Fully Protected"))) %>% 
  filter(region == "Cabo Pulmo") %>% 
  select(reef, protection, latitude, longitude) %>% 
  unique() %>% 
  st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>% 
  ggplot() +
  geom_sf(data = dafishr::mx_coastline, fill = NA) +
  geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
  geom_sf(aes(col=protection)) +
  coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
  labs(col = "") +
  theme_bw() +
  theme(legend.position = "top")

library(patchwork)

p2 + p1 

As there is a different number of reefs between inside and outside the area, we can try random sampling 3 reefs within the park to compare with the 3 outside to see if sampling size actually influence this difference, and we can see that actually the sites have consistently higher NRSI values.

Code
set.seed(123)

sampled_fully_protected_reefs <- 
bind_rows(
  nrsi %>%
  ungroup() %>% 
  filter(protection == "Fully Protected") %>%
  distinct(reef) %>%
  sample_n(3) %>%
  inner_join(nrsi, by = "reef"),
  nrsi %>% 
    filter(protection == "Not Protected")
)


resampled_prod <- sampled_fully_protected_reefs %>%
  group_by(protection) %>%
  nest() %>%
  mutate(
    ResampledMeans = map(data, ~resample_mean(.x)),
    Median = map_dbl(ResampledMeans, median)
  ) %>%
  select(protection, ResampledMeans, Median)

toplot <- resampled_prod %>%
  unnest(ResampledMeans) 

p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1) +
  geom_vline(xintercept = 0, linetype = 2) +
  labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
  scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
                       values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
  xlim(-0.8,0.8) +
  #facet_grid(~protection) +
  theme_bw() +
  theme(legend.position = "", 
        axis.text.x = element_text(angle = 90, vjust = .5))  



p2 <- sampled_fully_protected_reefs %>%  
  st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>% 
  ggplot() +
  geom_sf(data = dafishr::mx_coastline, fill = NA) +
  geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
  geom_sf(aes(col=protection)) +
  coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
  labs(col = "") +
  theme_bw() +
  theme(legend.position = "top")

p1+p2

Code
set.seed(111)

sampled_fully_protected_reefs <- 
bind_rows(
  nrsi %>%
  ungroup() %>% 
  filter(protection == "Fully Protected") %>%
  distinct(reef) %>%
  sample_n(3) %>%
  inner_join(nrsi, by = "reef"),
  nrsi %>% 
    filter(protection == "Not Protected")
)


resampled_prod <- sampled_fully_protected_reefs %>%
  group_by(protection) %>%
  nest() %>%
  mutate(
    ResampledMeans = map(data, ~resample_mean(.x)),
    Median = map_dbl(ResampledMeans, median)
  ) %>%
  select(protection, ResampledMeans, Median)

toplot <- resampled_prod %>%
  unnest(ResampledMeans) 

p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1) +
  geom_vline(xintercept = 0, linetype = 2) +
  labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
  scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
                       values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
  xlim(-0.8,0.8) +
  #facet_grid(~protection) +
  theme_bw() +
  theme(legend.position = "", 
        axis.text.x = element_text(angle = 90, vjust = .5))  



p2 <- sampled_fully_protected_reefs %>%  
  st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>% 
  ggplot() +
  geom_sf(data = dafishr::mx_coastline, fill = NA) +
  geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
  geom_sf(aes(col=protection)) +
  coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
  labs(col = "") +
  theme_bw() +
  theme(legend.position = "top")

p1+p2

Code
set.seed(341)

sampled_fully_protected_reefs <- 
bind_rows(
  nrsi %>%
  ungroup() %>% 
  filter(protection == "Fully Protected") %>%
  distinct(reef) %>%
  sample_n(3) %>%
  inner_join(nrsi, by = "reef"),
  nrsi %>% 
    filter(protection == "Not Protected")
)


resampled_prod <- sampled_fully_protected_reefs %>%
  group_by(protection) %>%
  nest() %>%
  mutate(
    ResampledMeans = map(data, ~resample_mean(.x)),
    Median = map_dbl(ResampledMeans, median)
  ) %>%
  select(protection, ResampledMeans, Median)

toplot <- resampled_prod %>%
  unnest(ResampledMeans) 

p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1) +
  geom_vline(xintercept = 0, linetype = 2) +
  labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
  scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
                       values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
  xlim(-0.8,0.8) +
  #facet_grid(~protection) +
  theme_bw() +
  theme(legend.position = "", 
        axis.text.x = element_text(angle = 90, vjust = .5))  




p2 <- sampled_fully_protected_reefs %>%  
  st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>% 
  ggplot() +
  geom_sf(data = dafishr::mx_coastline, fill = NA) +
  geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
  geom_sf(aes(col=protection)) +
  coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
  labs(col = "") +
  theme_bw() +
  theme(legend.position = "top")

p1+p2

NRSI and environmental conditions

We inspect if NRSI is influenced by environmental conditions other than the protection level of the reefs.

Code
#### ANALYSIS OF ENVIORNMENTAL ASSOCIATION WITH NRSI
fish <- read_csv("data/fish_database_submission.csv")

npp <- read_rds("data/LTEM_reefs_chl_yearly_average.RDS") %>% 
  select(reef = Reef, year, mean_chl) %>% 
  group_by(year, reef) %>% 
  summarise(mean_chl = mean(mean_chl))

sst <- read_rds("data/LTEM_reefs_SST_yearly_average.RDS") %>% 
  select(reef = Reef, year, mean_sst) %>% 
  group_by(year, reef) %>% 
  summarise(mean_sst = mean(mean_sst))


nrsi <-  fish %>% 
  filter(year > 2009) %>% 
  mutate(index_levels = case_when(
    str_detect(trophic_level_f, "4-4.5") ~ "UTL", 
    str_detect(trophic_level_f, "2-2.5") ~ "LTL", 
    TRUE ~ "CTL"
  ), 
  protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fishing Refuge", "Fully Protected"))) %>% 
  mutate(degree = round(latitude)) %>% 
  group_by(year, region, reef, protection, degree, latitude, longitude, depth2, index_levels, transect) %>% 
  summarise(biomass = sum(biomass, na.rm = TRUE)) %>% 
  group_by(year, region, reef, protection, index_levels, degree, latitude, longitude) %>% 
  summarise(biomass = mean(biomass, na.rm = TRUE)) %>% 
  group_by(year, region, reef, protection, degree, latitude, longitude) %>% 
  mutate(rel_biomass = (biomass / sum(biomass, na.rm = TRUE)) * 100) %>% 
  select(-biomass) %>%
  pivot_wider(names_from = index_levels, values_from = rel_biomass) %>%
  mutate(
    UTL = if_else(is.na(UTL), 0, UTL),  # Replace NA with 0 for UTL calculations
    LTL = if_else(is.na(LTL), 0, LTL),  # Replace NA with 0 for LTL calculations
    CTL = if_else(is.na(CTL), 0, CTL),  # Replace NA with 0 for CTL calculations
    nrsi = case_when(
      LTL > UTL + CTL ~ UTL / (UTL + CTL),  # Condition 1: Use only UTL if LTL is greater than UTL + CTL
      TRUE ~ (UTL + LTL - CTL) / (UTL + LTL + CTL)  # Condition 2: Compute as normal otherwise
    )
  ) %>% 
  left_join(., npp, by = c("year", "reef")) %>% 
  left_join(., sst, by = c("year", "reef")) %>% 
  filter(!is.na(mean_chl))

nrsi_data <- nrsi %>%
  mutate(protection = factor(protection)) 

First, we run a beta regression model using latitude, sea surface temperature, and chlorophyll a content, all obtained through remote sensing data using year averages.

Code
# Load necessary packages
library(dplyr)
library(betareg)

# Prepare data
nrsi_data <- nrsi_data %>%
  filter(!is.na(mean_sst)) %>%  # Remove rows with NA in mean_sst
  mutate(protection = as.factor(protection))

nrsi_data$transformed_index = (nrsi_data$nrsi + 1) / 2

nrsi_data <- nrsi_data %>% filter(transformed_index > 0)

# Fit beta regression model including environmental variables
beta_model <- betareg(transformed_index ~ protection + 
                        latitude + mean_chl + mean_sst, data = nrsi_data)

# Summary of the model
summary(beta_model)

Call:
betareg(formula = transformed_index ~ protection + latitude + mean_chl + 
    mean_sst, data = nrsi_data)

Standardized weighted residuals 2:
    Min      1Q  Median      3Q     Max 
-5.3189 -0.5823  0.0503  0.6828  3.9777 

Coefficients (mean model with logit link):
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)                0.99522    1.46724   0.678    0.498    
protectionNot Protected   -0.06650    0.06426  -1.035    0.301    
protectionFully Protected  0.68357    0.08304   8.232  < 2e-16 ***
protectionFishing Refuge   0.06356    0.09979   0.637    0.524    
latitude                  -0.01960    0.02369  -0.827    0.408    
mean_chl                  -0.36039    0.08826  -4.083 4.44e-05 ***
mean_sst                  -0.02758    0.03939  -0.700    0.484    

Phi coefficients (precision model with identity link):
      Estimate Std. Error z value Pr(>|z|)    
(phi)   6.7000     0.3076   21.78   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

Type of estimator: ML (maximum likelihood)
Log-likelihood: 296.6 on 8 Df
Pseudo R-squared: 0.1822
Number of iterations: 17 (BFGS) + 2 (Fisher scoring) 

The protection levels (Fully Protected) is singificantly higher and we found no significant effects for environmental parameters except for chlorophyll.

Such relationship is interesting so we explore it further by plotting NRSI against chla.

Code
nrsi_data %>% 
  ggplot(aes(x=nrsi, y=mean_chl, col = protection)) +
  geom_point()

It is evident that while a relationship seems to exist, it is because Fully Protected are much more common in oligotrophic condition. We can explore this further.

Code
ggplot(nrsi_data, aes(x=nrsi, y=mean_chl, col = protection)) +
  geom_point() +
  facet_grid(~protection) +
  geom_smooth(method = "lm")

There is a significant difference between chla values and protection levels, particularly for Fully Protected areas.

Code
anova_result <- aov(mean_chl ~ protection, data = nrsi_data)
summary(anova_result)
             Df Sum Sq Mean Sq F value Pr(>F)    
protection    3  20.42   6.807   69.63 <2e-16 ***
Residuals   832  81.34   0.098                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
# Perform Tukey's HSD test
tukey_results <- TukeyHSD(anova_result)

# View results
print(tukey_results)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = mean_chl ~ protection, data = nrsi_data)

$protection
                                        diff         lwr         upr     p adj
Not Protected-Lightly Protected   -0.1739941 -0.24062308 -0.10736515 0.0000000
Fully Protected-Lightly Protected -0.4372828 -0.51718334 -0.35738224 0.0000000
Fishing Refuge-Lightly Protected  -0.0484950 -0.15714918  0.06015917 0.6592342
Fully Protected-Not Protected     -0.2632887 -0.33893439 -0.18764295 0.0000000
Fishing Refuge-Not Protected       0.1254991  0.01993441  0.23106382 0.0122038
Fishing Refuge-Fully Protected     0.3887878  0.27438242  0.50319314 0.0000000
Code
# Summary statistics for mean_chl by protection level
summary_stats <- nrsi_data %>%
  group_by(protection) %>%
  summarise(
    mean_mean_chl = mean(mean_chl, na.rm = TRUE),
    median_mean_chl = median(mean_chl, na.rm = TRUE),
    sd_mean_chl = sd(mean_chl, na.rm = TRUE),
    n = n()
  )

# Display summary statistics
summary_stats
# A tibble: 4 × 5
  protection        mean_mean_chl median_mean_chl sd_mean_chl     n
  <fct>                     <dbl>           <dbl>       <dbl> <int>
1 Lightly Protected         0.904           0.802       0.363   254
2 Not Protected             0.730           0.671       0.353   343
3 Fully Protected           0.467           0.496       0.126   169
4 Fishing Refuge            0.856           0.865       0.192    70

This suggest that the significant effect of chla is mostly due to the spatial position of fully protected areas and not necessarily to the influence of chla to the NRSI.